Now let’s get plotting! I want to look intially for each year at the number of calls per tract versus other measures of the tract. Let’s start with 2010:

nyc_2010 %>% 
  drop_na(calls_per_person, total_pop) %>% 
  ggplot(mapping = aes(x = total_pop, y = calls_per_person)) +
  geom_point()

Looking at this, we want to remove some outliers. Let’s suppose that we are only going to look at tracts that have more than 2000 residents.

nyc_2010 %>% 
  filter(total_pop > 2000) %>% 
  ggplot(mapping = aes(x = total_pop, y = calls_per_person)) +
  geom_point()

Now let’s measure versus other tract measurements. We have a few to use:

Average rent:

nyc_2010 %>% 
  filter(total_pop > 2000) %>% 
  mutate(avg_rent = na_if(avg_rent, NaN)) %>% 
  drop_na(avg_rent, calls_per_person) %>% 
  ggplot(mapping = aes(x = avg_rent, y = calls_per_person)) +
  geom_point()

Percent white:

nyc_2010 %>% 
  filter(total_pop > 2000) %>% 
  ggplot(mapping = aes(x = percent_white, y = calls_per_person)) +
  geom_point()

Median household income:

nyc_2010 %>% 
  filter(total_pop > 2000) %>% 
  drop_na(calls_per_person, med_income) %>% 
  ggplot(mapping = aes(x = med_income, y = calls_per_person)) +
  geom_point()

What I’m most interested in is the change from years in terms of calls.

reform_df <- function(data, year){
  data %>% 
    mutate(
      year = year,
      rent_increase = if_else(
        (nyc_2017$avg_rent > avg_rent),
        true = TRUE,
        false = FALSE
      ),
      white_increase = if_else(
        (nyc_2017$percent_white > percent_white),
        true = TRUE,
        false = FALSE
      ),
      income_increase = if_else(
        (nyc_2017$med_income > med_income),
        true = TRUE,
        false = FALSE
      )
    ) %>% 
    select(-geometry, -num_calls)
}

nyc_2010 <- reform_df(nyc_2010, 2010)
nyc_2011 <- reform_df(nyc_2011, 2011)
nyc_2012 <- reform_df(nyc_2012, 2012)
nyc_2013 <- reform_df(nyc_2013, 2013)
nyc_2014 <- reform_df(nyc_2014, 2014)
nyc_2015 <- reform_df(nyc_2015, 2015)
nyc_2016 <- reform_df(nyc_2016, 2016)
nyc_2017 <- reform_df(nyc_2017, 2017)

full_nyc <-
  nyc_2010 %>% 
  rbind(nyc_2011, nyc_2012, nyc_2013, nyc_2014, nyc_2015, nyc_2016, nyc_2017)

nyc_3pts <-
  rbind(nyc_2010, nyc_2014, nyc_2017)
full_nyc %>%   
  filter(total_pop > 2000, rent_increase == TRUE) %>% 
  mutate(avg_rent = na_if(avg_rent, NaN)) %>% 
  drop_na(avg_rent, calls_per_person) %>% 
  arrange(fips, year) %>% 
  ggplot(mapping = aes(x = avg_rent, y = calls_per_person)) +
  geom_point(aes(alpha = year), size = 2) +
  geom_path(aes(alpha = year), size = 1) +
  scale_alpha_continuous(breaks = seq(2010, 2017, 1), limits = c(2010, 2017))

full_nyc %>%   
  filter(total_pop > 2000, white_increase == TRUE) %>% 
  drop_na(percent_white, calls_per_person) %>% 
  arrange(fips, year) %>% 
  ggplot(mapping = aes(x = percent_white, y = calls_per_person)) +
  geom_point(aes(alpha = year), size = 2) +
  geom_path(aes(alpha = year), size = 1) +
  scale_alpha_continuous(breaks = seq(2010, 2017, 1), limits = c(2010, 2017))

full_nyc %>%   
  filter(total_pop > 2000, income_increase == TRUE) %>% 
  drop_na(med_income, calls_per_person) %>% 
  arrange(fips, year) %>% 
  ggplot(mapping = aes(x = med_income, y = calls_per_person)) +
  geom_point(aes(alpha = year), size = 2) +
  geom_path(aes(alpha = year), size = 1) +
  scale_alpha_continuous(breaks = seq(2010, 2017, 1), limits = c(2010, 2017))

inner_join(nyc_2010, nyc_2017, by = "fips") %>%
  filter(total_pop.x > 2000, avg_rent.y > avg_rent.x) %>% 
  mutate(avg_rent.x = na_if(avg_rent.x, NaN)) %>% 
  drop_na(avg_rent.x, calls_per_person.x) %>% 
  ggplot(mapping = aes(x = avg_rent.x, y = calls_per_person.x)) +
  geom_point(aes(color = "2010")) +
  geom_point(aes(x = avg_rent.y, y = calls_per_person.y, color = "2017")) +
  geom_segment(aes(xend = avg_rent.y, yend = calls_per_person.y))

inner_join(nyc_2010, nyc_2017, by = "fips") %>%
  filter(total_pop.x > 2000, percent_white.y > percent_white.x) %>% 
  drop_na(percent_white.x, calls_per_person.x) %>% 
  ggplot(mapping = aes(x = percent_white.x, y = calls_per_person.x)) +
  geom_point(aes(color = "2010")) +
  geom_point(aes(x = percent_white.y, y = calls_per_person.y, color = "2017")) +
  geom_segment(aes(xend = percent_white.y, yend = calls_per_person.y))

inner_join(nyc_2010, nyc_2017, by = "fips") %>%
  filter(total_pop.x > 2000, med_income.y > med_income.x) %>% 
  drop_na(med_income.x, calls_per_person.x) %>% 
  ggplot(mapping = aes(x = med_income.x, y = calls_per_person.x)) +
  geom_point(aes(color = "2010")) +
  geom_point(aes(x = med_income.y, y = calls_per_person.y, color = "2017")) +
  geom_segment(aes(xend = med_income.y, yend = calls_per_person.y))

inner_join(nyc_2010, nyc_2017, by = "fips") %>%
  filter(total_pop.x > 2000, med_income.y > med_income.x) %>% 
  drop_na(med_income.x, calls_per_person.x) %>% 
  ggplot(mapping = aes(x = med_income.x, y = calls_per_person.x)) +
  geom_segment(
    aes(xend = med_income.y, yend = calls_per_person.y),
    arrow = arrow(length = unit(0.2, "cm"))
  )

filter_data <- function(data){
  data %>% 
    filter(total_pop > 2000) %>% 
    drop_na(med_income)
}

filtered_2010 <- filter_data(nyc_2010)
filtered_2012 <- filter_data(nyc_2012)
filtered_2014 <- filter_data(nyc_2014)
filtered_2016 <- filter_data(nyc_2016)

filtered_2010 %>% 
  ggplot(aes(x = med_income, y = calls_per_person)) +
  geom_point(aes(color = "2010")) +
  geom_point(
    data = filtered_2012, 
    aes(x = med_income, y = calls_per_person, color = "2012")
  ) + 
  geom_point(
    data = filtered_2014, 
    aes(x = med_income, y = calls_per_person, color = "2014")
  ) + 
  geom_point(
    data = filtered_2016, 
    aes(x = med_income, y = calls_per_person, color = "2016")
  ) 

filtered_2010 %>% 
  select(fips, med_income_2010 = med_income, calls_2010 = calls_per_person) %>% 
  inner_join(
    filtered_2012 %>% 
      select(fips, med_income_2012 = med_income, calls_2012 = calls_per_person),
    by = "fips"
  ) %>% 
  inner_join(
    filtered_2014 %>% 
      select(fips, med_income_2014 = med_income, calls_2014 = calls_per_person),
    by = "fips"
  ) %>% 
  inner_join(
    filtered_2016 %>% 
      select(fips, med_income_2016 = med_income, calls_2016 = calls_per_person),
    by = "fips"
  ) %>% 
  ggplot() +
  geom_segment(
    aes(
      x = med_income_2010, 
      y = calls_2010, 
      xend = med_income_2012, 
      yend = calls_2012
    ),
    color = "light gray"
  ) +
  geom_segment(
    aes(
      x = med_income_2012, 
      y = calls_2012, 
      xend = med_income_2014, 
      yend = calls_2014
    ),
    color = "light gray"
  ) +
  geom_segment(
    aes(
      x = med_income_2014, 
      y = calls_2014, 
      xend = med_income_2016, 
      yend = calls_2016
    ),
    color = "light gray"
  ) +
  geom_point(aes(x = med_income_2010, y = calls_2010, color = "2010")) + 
  geom_point(aes(x = med_income_2012, y = calls_2012, color = "2012")) + 
  geom_point(aes(x = med_income_2014, y = calls_2014, color = "2014")) + 
  geom_point(aes(x = med_income_2016, y = calls_2016, color = "2016")) +
  scale_color_manual(
    values = c("red3", "orange3", "yellow3", "springgreen4")
  ) +
  theme()

inner_join(nyc_2010, nyc_2017, by = "fips") %>%
  filter(total_pop.x > 2000, med_income.y > med_income.x) %>% 
  drop_na(med_income.x, calls_per_person.x) %>% 
  ggplot(mapping = aes(x = med_income.x, y = calls_per_person.x)) +
  geom_segment(
    aes(xend = med_income.y, yend = calls_per_person.y),
    arrow = arrow(length = unit(0.2, "cm"))
  ) +
  geom_label(
    x = 200000, 
    y = 0.8, 
    label = "1 arrow = 1 census tract"
  ) +
  annotate(
    geom = "label",
    x = 203000,
    y = 0.5,
    label = "2010",
    color = "red",
    size = 3
  ) +
  annotate(
    geom = "label",
    x = 255000,
    y = 0.3,
    label = "2017",
    color = "red",
    size = 3
  ) +
  scale_x_continuous(labels = scales::dollar) +
  coord_fixed(ratio = 200000) +
  theme_classic() +
  labs(
    title = "311 Calls increase with Median Income from 2010 to 2017",
    subtitle = "Tracking 311 calls for Census Tracts in New York City",
    x = "Median Annual Income",
    y = "Calls per person",
    caption = "Source: NYC Open Data, American Fact Finder"
  )